package com.kdtech.analyse.news;
import com.kdtech.analyse.AnalyseNews;
import com.kdtech.utils.HtmlCleaner;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.crawler.CrawlHTML;
import com.kdtech.crawler.at.UrlArgumentTop;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.StringUtils;
import com.kdtech.utils.HtmlCleaner;

/**
 * http://www.fangyuan365.com/ 长沙房产网
 *  abc
 */
public class Fangyuan365NewsAnalyse implements AnalyseNews{
	
	public boolean isDetailPage(String url) {
		boolean bRet=false;
		String[] regex={
				"http://www.fangyuan365.com/article/view_[0-9]*_1.html",
				"http://.*[.]fangyuan365.com/article/List.asp\\ID=[0-9]+",
				"http://.*[.]fangyuan365.com/article/List.asp\\id=[0-9]+",
				"http://www.fangyuan365.com/.*/List.asp[?]ID=[0-9]*"
				
				};
		for (int i=0; i < regex.length; i++) {
			if (url.matches(regex[i])) {
				return true;
			}
		}
		return bRet;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {		
		NewsMeta news=new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}		
		String htmltxt=urlMeta.getHtml();
		String url=urlMeta.getUrl();		
		if(!isDetailPage(url)){
		}				
		news.setUrl(url);
		String source="长沙房产网";
		
		String title=null;
		String content=null;
		Long date=null;
		String dateStr=null;
		Document doc=Jsoup.parse(htmltxt);
		doc.select(".kaiti14pxred,.kaiti14pxred2").remove();
		title =doc.select("div.left_title").text();
		if(StringUtils.isBlank(title))
		title =doc.select("td.huiboder h3:eq(1)").text();
		if(title==null) title=doc.select("div.YH24").text();
		dateStr=doc.select("span.time,a.STYLE1").text();
		if(date==null)  date=DateUtils.matchDate(doc.select("div.NewsArtbq").text());
		content=HtmlCleaner.getContentHtml(url,doc.select("div.left_content"));
		if(StringUtils.isBlank(content))
		content=HtmlCleaner.getContentHtml(url,doc.select("td.news14px"));
		if(title==null || title.trim().length()==0){
		}
		date=DateUtils.matchDate(dateStr);
//		/**
//		 * 初始化、截取 、设置作者信息*******************************************
//		 * 
//		 * */
//		String str=null;
//		String author=null;
//		author=doc.select("div.left_origin").text();
//		author=StringUtils.substringBetween(author," ","加入");
//		news.setAuthor(author);
//		/**
//		 *                        	 **************************end *************************
//		 * 
//		 **/

		news.setTitle(StringUtils.trimSpace(title));
		news.setContent(content);
		news.setDate(date);	
		/**
		 * 解析用于更新的地址
		 */
		String updateUrl=null;
			
		return news;
	}
	
	public NewsMeta Update(NewsMeta meta) {
		return null;
	}
	public static void main(String[] args) {
		Fangyuan365NewsAnalyse a=new Fangyuan365NewsAnalyse();
		String url="http://www.fangyuan365.com/article/List.asp?ID=9368";
		String fromatUrl=UrlArgumentTop.FromatUrl(url);
		System.out.println("url="+fromatUrl);
		System.out.println(a.isDetailPage(fromatUrl));
		UrlMeta meta=CrawlHTML.responseToURL(fromatUrl);
		NewsMeta parserHtml=a.parserHtml(meta);
		System.out.println(parserHtml);
	}

	
	public boolean isNeedUpdate() {
		return false;
	}
}
